import re

class TextPreprocessor:
    def __init__(self):
        self.stop_words = {"我的", "一下", "嗯", "呀", "吗", "啊", "呢", "啦", "吧", "哈", "哦", "哟", "哇", "啥", "哪",
                           "那", "这", "的", "了", "呢"}

    def clean_text(self, text):
        # 去除语气词
        words = [word for word in text if word not in self.stop_words]
        cleaned_text = ''.join(words)

        # 保留关键符号：数字、字母、中文、空格、冒号（用于订单号）
        cleaned_text = re.sub(r'[^\w\u4e00-\u9fa5\s:]', ' ', cleaned_text)
        cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()

        return cleaned_text